# initialisation code copied on file GEO872-session-01-solution.rmd

# packages:

## Default repository
local({r <- getOption("repos")
       r["CRAN"] <- "http://cran.r-project.org" 
       options(repos=r)
})

check_pkg <- function(x)
  {
    if (!require(x,character.only = TRUE, quietly = TRUE))
    {
      install.packages(x,dep=TRUE)
        if(!require(x,character.only = TRUE, quietly = TRUE)) stop("Package not found")
    }
}

# Call check_pkg() to install/load the required packages. 
check_pkg("here")
check_pkg("sf")
check_pkg("readr") 
check_pkg("ggplot2")
check_pkg("dplyr")
check_pkg("lubridate") #for timezones
check_pkg("raster")
check_pkg("leaflet")
# Machine learning
check_pkg("caTools")
check_pkg("class")
check_pkg("forecast")
check_pkg("MLmetrics")
check_pkg("randomForest")

#https://cran.r-project.org/web/packages/rmdwc/rmdwc.pdf
check_pkg("rmdwc")
#rmdcountAddin()

#DATA FOLDER
#On Martine laptop:
# data_folder <- "D:/Documents/Master/UZH/cours/GEO880_Computational Movement Analysis/Project/FS23_880Project_G3/data"
#On Yelu laptop:
data_folder <- "D:/GEO880/Project"

Criteria:

new length: Length of report (approx. 15000 char (incl. spaces, incl. References list, excl. Code listing), 20000 char max)

Report your work in a written project report. The report has two functions:
– It shall serve you as documentation of what you did, such that at a later stage you can use the report in one of your own projects.
– It will be used to evaluate and mark your project.

Your report shall: – cover how you went about investigating your research questions. Describe your data science ideas and how you implemented your ideas. – present the results of your study and discuss them in the light of your research questions. What have you achieved and what would be further steps for future research? – report problems and limitations you encountered along the way and the solutions you chose to overcome these, be it limitations with respect to the data sources, the tools or any other source of limitation. – discuss your data science choices in the light of the theory covered in the lectures, group works, and your reading assignments.

Good example: https://fbilan.github.io/GEO880_2020/Project_GEO880_Wysling_Biland.html

Introduction

Research questions

  1. Do the characteristics of movement trajectories differ beetween different travel modes?
  2. Could the characteristics be used to identify travel mode?
  3. How to identify different travel modes with movement trajectories? Could a identification model be built?
  4. How accurate and efficient could different travel modes be identified based on the aforementioned identification model?

Background

Literature review - list of travel modes considered in our study. - characteristics specific to the mean of transport

Methodology

First the trajectories data will be prepared to be fed for machine learning: data cleaning, labelizing, values extracted.

Labels list: boat, train, etc.

Preprocessing of trajectories data

Posmo data

  • 30 March to 18 June (~169 days)
  • clean GPS data and add basic common columns attributes
  • labelize data
  • overview (here or maybe later?) …

Import, clean posmo data

#import and convert to sf object

posmo <- read_delim(here(data_folder, "posmo_2023-06-18.csv"), ",",show_col_types = FALSE) %>% st_as_sf(coords = c("lon_x", "lat_y"), crs=4326, remove = FALSE) |> st_transform(2056)

#clean and add columns
posmo$source <- "posmo"

posmo <- posmo %>% 
       rename("tmode_posmo" = "transport_mode")

posmo$tmode_manual <- NA

to_remove <- c("place_name", "lon_x", "lat_y", "user_id")
posmo <- posmo[ , !(names(posmo) %in% to_remove)]

#add East and North columns
coords <- posmo |> st_coordinates()
posmo <- posmo |> 
  mutate(E =  coords[,1], N = coords[,2])


# Keep only Switzerland
ch_ll <- c(2460666, 1069416)
ch_ur <- c(2849000, 1300750)
posmo <- posmo %>% 
  filter((E > ch_ll[1] & E < ch_ur[1] & N > ch_ll[2] & N < ch_ur[2]))



#update timezone
posmo <- posmo %>%
  mutate(
    datetime = datetime %>% with_tz(tzone = "Europe/Zurich")
  )

# Add heights
# The height comes from the Digital height model DHM25 provided by swisstopo. The raster has been preprocessed on QGIS to be converted into the epsg 2056 (previously 21781) to save calculation time.
# The resolution is 25m. To have more precise information, we could look at the swiss3DAlti if needed.  
# Simplification: we did not consider height of soil compared to height of person or public transport.  
# As it is a relative variable to calculate slope, it does not matter.

#Extract values from rasters:
#https://gisday.wordpress.com/2014/03/24/extract-raster-values-from-points-using-r/comment-page-1/
dhm25 <- raster(here(data_folder, "dhm25_raster", "dhm25_2056.tif"))

rasValue <- extract(dhm25, posmo)
posmo$H <- rasValue


# #Adapt column order
# col_order <- c("source", "datetime", "weekday", "tmode_posmo", "tmode_manual" ,"E", "N", "H", "timelag", "steplength_m", "speed_ms", "slope", "geometry")
# posmo <- posmo[, col_order]
# posmo


# REMOVE:
# heights for plane
# rmeove outside of Switzerland (Freiburg and Basel)

Labelize posmo data

# Add column for travel mode
posmo$tmode_manual <- NA

#select boats segments
condition_boats <- posmo$datetime > as.POSIXct("2023-05-14 13:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:21:00", tz = "Europe/Zurich")

#select planes segments
condition_planes <- posmo$datetime > as.POSIXct("2023-06-17 11:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 12:22:00", tz = "Europe/Zurich")

#select cable car segments
condition_cable_cars <- 
  posmo$datetime > as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:18:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:10:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 12:27:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:18:10", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:24:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 13:11:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich")

# select ski lifts segments
condition_ski_lifts <- 
  posmo$datetime > as.POSIXct("2023-04-12 09:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 09:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:59:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 10:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:33:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:43:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:48:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:56:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich")

# select t-bar segments
condition_t_bars <- 
  posmo$datetime > as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich")

# select ski slopes segments
condition_skis <- 
  posmo$datetime > as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:51:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 09:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:48:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:17:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:25:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 15:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 15:59:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:24:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:28:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:36:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:43:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:48:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 09:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:42:30", tz = "Europe/Zurich")

#select kick scooters segments
condition_scooters <- 
  posmo$datetime > as.POSIXct("2023-05-04 07:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 07:42:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-24 22:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-24 19:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 20:00:00", tz = "Europe/Zurich")

#select bikes segments
condition_bikes <- 
  posmo$datetime > as.POSIXct("2023-05-02 08:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 08:36:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-02 11:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 12:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-02 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 18:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-15 15:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-15 16:32:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-01 07:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 07:53:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-01 18:38:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 18:52:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 08:00:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 17:32:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:35:00", tz = "Europe/Zurich")

#select train segments
condition_trains <- 
  posmo$datetime > as.POSIXct("2023-05-26 18:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:15:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-05-09 8:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 10:55:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-04-28 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:50:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-06-09 19:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 21:30:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-09 12:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:48:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-11 02:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 02:45:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-12 07:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-06-14 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 17:42:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 09:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:00:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 08:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:02:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-16 18:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 19:11:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-18 12:45:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 16:33:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:43:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 15:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 17:53:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-03-31 16:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:47:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-03 21:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 00:22:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-06 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 17:57:00", tz = "Europe/Zurich") 

#select trams segments
condition_trams <- 
  posmo$datetime > as.POSIXct("2023-06-09 17:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:22:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-09 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 13:54:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 11:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:23:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-05 18:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 19:03:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-05 23:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:57:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 10:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 18:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:16:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-04 16:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 17:01:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-04 19:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:41:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-05 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-05 08:52:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-06 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:50:00", tz = "Europe/Zurich") 

#select bus segments
condition_bus <- 
  posmo$datetime > as.POSIXct("2023-06-09 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:31:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-24 19:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 19:49:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:22:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-07 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:12:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 08:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:45:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 19:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 19:37:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 20:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 20:44:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 21:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 21:17:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:25:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 08:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:35:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-05 18:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 18:46:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-25 11:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-25 16:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 16:39:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-17 16:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:19:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-18 12:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-18 12:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-04 23:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:23:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 06:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 07:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:50:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-01 22:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 22:30:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 21:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 22:14:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-03-31 16:13:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:20:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-04 19:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:59:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-06 07:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:33:00", tz = "Europe/Zurich")

#select walking segments
condition_walks <- 
  posmo$datetime > as.POSIXct("2023-04-16 15:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:37:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-04-16 15:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:56:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-13 13:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 15:01:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-13 16:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:01:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-13 22:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:29:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-20 11:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 22:35:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-22 11:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 11:42:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-29 16:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 16:46:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-09 11:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:20:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-11 02:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 03:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 08:07:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-06 08:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:57:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 09:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-12 18:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 18:50:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 12:05:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-25 15:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 08:43:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 08:46:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-13 18:05:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 18:15:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:25:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:24:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 19:15:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-16 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 18:45:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-16 21:41:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 22:23:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-17 15:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:04:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-17 16:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:28:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-17 22:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 22:45:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:19:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:33:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 18:12:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-04 22:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 15:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 15:23:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 19:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 20:00:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-05-31 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 21:55:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-10 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-10 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 22:18:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-11 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 11:56:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-11 15:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 15:36:00", tz = "Europe/Zurich")

#select car segments
condition_cars <- 
  posmo$datetime > as.POSIXct("2023-06-11 15:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 17:00:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-04-24 14:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 17:30:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-10 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 17:33:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-09 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 22:30:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-14 07:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 08:00:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-14 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 18:10:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-13 18:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-13 19:15:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-15 14:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 21:41:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-15 07:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 08:04:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-16 23:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 23:40:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-17 14:34:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 14:42:30", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-04 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") | 
  posmo$datetime > as.POSIXct("2023-06-03 21:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-03 22:18:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-06-02 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 18:43:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-06 21:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 22:37:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-09 09:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-09 12:05:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-10 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 09:39:00", tz = "Europe/Zurich") |
  posmo$datetime > as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 21:10:00", tz = "Europe/Zurich")
  

#final labelizing
posmo <- posmo %>%
  mutate(tmode_manual = case_when(condition_walks ~ 'walk',
                                  condition_boats ~ 'boat',
                                  condition_planes ~ 'plane',
                                  condition_trains ~ 'train',
                                  condition_bikes ~ 'bike',
                                  condition_scooters ~ 'kick-scooter',
                                  condition_cars ~ 'car',
                                  condition_bus ~ 'bus',
                                  condition_trams ~ 'tram',
                                  condition_t_bars ~ 't_bar',
                                  condition_cable_cars ~ 'cable_car',
                                  condition_ski_lifts ~ 'ski_lift',
                                  condition_skis ~ 'ski',
                                  is.na(posmo$tmode_manual) == TRUE ~ 'unclassified'
                                  ))


# Remove heights for plane (bc completely false):
posmo$H[posmo$tmode_manual == "plane"] <- NA


# select columns
posmo <- dplyr::select(posmo, c("source", "datetime", "tmode_manual", "E", "N", "H", "geometry"))

GPS data

Data cleaning and basic processing

## Load the data
gps_data_raw <- read_delim(here(data_folder, "yelu_dataset_all.csv"), ";")

## Use right coordinate system and preserve original E/N columns
gps_data_raw <- st_as_sf(gps_data_raw, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
gps_data <- gps_data_raw %>% st_transform(crs = 2056)

gps_data$Datetime <- as.POSIXct(paste(as.Date(gps_data$Date, format = '%d.%m.%Y'), gps_data$Time), format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
gps_data <- gps_data %>% mutate(Datetime = Datetime %>% with_tz(tzone = "Europe/Zurich"))

gps_data$source <- "gps"


# Extract E and N values
coords <- gps_data |> st_coordinates()
gps_data <- gps_data |> 
  mutate(Latitude =  coords[,2], Longitude = coords[,1])


# Rename columns
colnames(gps_data)[11] <- "datetime"
colnames(gps_data)[3] <- "N"
colnames(gps_data)[4] <- "E"
colnames(gps_data)[8] <- "H"

# Add column for travel mode
gps_data$tmode_manual <- NA


gps_data <- dplyr::select(gps_data, c("source", "datetime", "N", "E", "H", "geometry", "tmode_manual"))
# dataFolder   <- here::here("D:/GEO880/Project/")
# zurich_map <- st_read(file.path(dataFolder, "stadtkreis/stadtkreis/Stadtkreis.shp"))
# zurich_map <- st_transform(zurich_map, epsg = 2056)

Labelizing data

# Conditions for different travel modes



condition_bus <- 
  gps_data$datetime >= as.POSIXct("2023-04-21 16:18:35") & gps_data$datetime <= as.POSIXct("2023-04-21 16:23:40") | 
  gps_data$datetime >= as.POSIXct("2023-04-30 18:04:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:20:15") | 
  gps_data$datetime >= as.POSIXct("2023-04-30 19:46:20") & gps_data$datetime <= as.POSIXct("2023-04-30 19:59:25") |
  gps_data$datetime >= as.POSIXct("2023-05-10 19:31:25") & gps_data$datetime <= as.POSIXct("2023-05-10 19:34:55") |
  gps_data$datetime >= as.POSIXct("2023-05-10 22:43:00") & gps_data$datetime <= as.POSIXct("2023-05-10 22:56:05") |
  gps_data$datetime >= as.POSIXct("2023-05-12 07:46:00") & gps_data$datetime <= as.POSIXct("2023-05-12 07:51:20") |
  gps_data$datetime >= as.POSIXct("2023-05-26 07:45:20") & gps_data$datetime <= as.POSIXct("2023-05-26 07:50:25") |
  gps_data$datetime >= as.POSIXct("2023-06-05 14:59:35") & gps_data$datetime <= as.POSIXct("2023-06-05 15:09:00") |
  gps_data$datetime >= as.POSIXct("2023-06-05 15:17:55") & gps_data$datetime <= as.POSIXct("2023-06-05 15:27:40") 



condition_tram <- 
  gps_data$datetime >= as.POSIXct("2023-04-21 17:18:55") & gps_data$datetime <= as.POSIXct("2023-04-21 17:26:05") | 
  gps_data$datetime >= as.POSIXct("2023-04-25 15:07:50") & gps_data$datetime <= as.POSIXct("2023-04-25 15:11:55") | 
  gps_data$datetime >= as.POSIXct("2023-04-25 17:40:50") & gps_data$datetime <= as.POSIXct("2023-04-25 17:47:20") | 
  gps_data$datetime >= as.POSIXct("2023-04-25 18:57:55") & gps_data$datetime <= as.POSIXct("2023-04-25 19:04:12") | 
  gps_data$datetime >= as.POSIXct("2023-04-25 19:50:45") & gps_data$datetime <= as.POSIXct("2023-04-25 19:54:25") | 
  gps_data$datetime >= as.POSIXct("2023-04-25 20:26:10") & gps_data$datetime <= as.POSIXct("2023-04-25 20:32:05") | 
  gps_data$datetime >= as.POSIXct("2023-04-29 18:06:45") & gps_data$datetime <= as.POSIXct("2023-04-29 18:12:20") | 
  gps_data$datetime >= as.POSIXct("2023-04-30 16:45:05") & gps_data$datetime <= as.POSIXct("2023-04-30 16:56:30") | 
  gps_data$datetime >= as.POSIXct("2023-04-30 17:46:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:00:00") | 
  gps_data$datetime >= as.POSIXct("2023-04-30 20:10:35") & gps_data$datetime <= as.POSIXct("2023-04-30 20:12:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-01 12:56:05") & gps_data$datetime <= as.POSIXct("2023-05-01 13:04:10") | 
  gps_data$datetime >= as.POSIXct("2023-05-01 13:16:50") & gps_data$datetime <= as.POSIXct("2023-05-01 13:25:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-01 13:25:55") & gps_data$datetime <= as.POSIXct("2023-05-01 13:29:40") | 
  gps_data$datetime >= as.POSIXct("2023-05-01 19:28:40") & gps_data$datetime <= as.POSIXct("2023-05-01 19:30:35") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 13:42:15") & gps_data$datetime <= as.POSIXct("2023-05-06 13:54:15") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 13:58:00") & gps_data$datetime <= as.POSIXct("2023-05-06 14:04:35") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 15:51:20") & gps_data$datetime <= as.POSIXct("2023-05-06 15:59:40") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 16:07:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:18:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 16:27:00") & gps_data$datetime <= as.POSIXct("2023-05-06 16:33:40") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 17:13:15") & gps_data$datetime <= as.POSIXct("2023-05-06 17:20:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 17:32:15") & gps_data$datetime <= as.POSIXct("2023-05-07 17:34:00") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 18:09:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:26:55") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 18:43:35") & gps_data$datetime <= as.POSIXct("2023-05-07 18:46:25") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 19:41:25") & gps_data$datetime <= as.POSIXct("2023-05-07 19:44:50") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 19:47:25") & gps_data$datetime <= as.POSIXct("2023-05-07 20:09:15") | 
  gps_data$datetime >= as.POSIXct("2023-05-08 19:25:25") & gps_data$datetime <= as.POSIXct("2023-05-08 19:28:35") | 
  gps_data$datetime >= as.POSIXct("2023-05-08 19:49:45") & gps_data$datetime <= as.POSIXct("2023-05-08 19:53:25") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 17:16:45") & gps_data$datetime <= as.POSIXct("2023-05-10 17:28:45") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 17:52:15") & gps_data$datetime <= as.POSIXct("2023-05-10 17:53:15") | 
  gps_data$datetime >= as.POSIXct("2023-05-12 18:00:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:08:15") |
  gps_data$datetime >= as.POSIXct("2023-05-13 17:39:25") & gps_data$datetime <= as.POSIXct("2023-05-13 17:41:25") |
  gps_data$datetime >= as.POSIXct("2023-05-16 17:48:05") & gps_data$datetime <= as.POSIXct("2023-05-16 17:50:00") |
  gps_data$datetime >= as.POSIXct("2023-05-17 14:27:15") & gps_data$datetime <= as.POSIXct("2023-05-17 14:35:00") |
  gps_data$datetime >= as.POSIXct("2023-05-17 14:52:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:00:25") |
  gps_data$datetime >= as.POSIXct("2023-05-17 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-17 15:30:10") |
  gps_data$datetime >= as.POSIXct("2023-05-18 14:45:50") & gps_data$datetime <= as.POSIXct("2023-05-18 14:56:25") |
  gps_data$datetime >= as.POSIXct("2023-05-18 16:00:10") & gps_data$datetime <= as.POSIXct("2023-05-18 16:07:10") |
  gps_data$datetime >= as.POSIXct("2023-05-20 16:43:35") & gps_data$datetime <= as.POSIXct("2023-05-20 16:51:35") |
  gps_data$datetime >= as.POSIXct("2023-05-20 17:00:30") & gps_data$datetime <= as.POSIXct("2023-05-20 17:07:45") |
  gps_data$datetime >= as.POSIXct("2023-05-20 20:30:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:45:40") |
  gps_data$datetime >= as.POSIXct("2023-05-21 19:10:00") & gps_data$datetime <= as.POSIXct("2023-05-21 19:14:15") |
  gps_data$datetime >= as.POSIXct("2023-05-21 19:16:50") & gps_data$datetime <= as.POSIXct("2023-05-21 19:37:25") |
  gps_data$datetime >= as.POSIXct("2023-05-21 20:25:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:29:10") |
  gps_data$datetime >= as.POSIXct("2023-05-21 20:37:45") & gps_data$datetime <= as.POSIXct("2023-05-21 20:47:35") |
  gps_data$datetime >= as.POSIXct("2023-05-23 17:43:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:57:50") |
  gps_data$datetime >= as.POSIXct("2023-05-23 17:43:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:57:50") |
  gps_data$datetime >= as.POSIXct("2023-05-24 13:19:10") & gps_data$datetime <= as.POSIXct("2023-05-24 13:24:35") |
  gps_data$datetime >= as.POSIXct("2023-05-24 17:39:00") & gps_data$datetime <= as.POSIXct("2023-05-24 17:55:35") |
  gps_data$datetime >= as.POSIXct("2023-05-25 14:46:30") & gps_data$datetime <= as.POSIXct("2023-05-25 14:55:30") |
  gps_data$datetime >= as.POSIXct("2023-05-25 15:19:20") & gps_data$datetime <= as.POSIXct("2023-05-25 15:28:35") |
  gps_data$datetime >= as.POSIXct("2023-05-26 13:17:05") & gps_data$datetime <= as.POSIXct("2023-05-26 13:31:20") |
  gps_data$datetime >= as.POSIXct("2023-05-26 13:53:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:10:00") |
  gps_data$datetime >= as.POSIXct("2023-05-26 14:16:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:18:05") |
  gps_data$datetime >= as.POSIXct("2023-05-28 14:52:20") & gps_data$datetime <= as.POSIXct("2023-05-28 15:04:05") |
  gps_data$datetime >= as.POSIXct("2023-05-28 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-28 15:38:50") |
  gps_data$datetime >= as.POSIXct("2023-05-28 20:20:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:37:00") |
  gps_data$datetime >= as.POSIXct("2023-05-28 20:45:05") & gps_data$datetime <= as.POSIXct("2023-05-28 20:56:10") |
  gps_data$datetime >= as.POSIXct("2023-05-29 17:40:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:59:50") | 
  gps_data$datetime >= as.POSIXct("2023-05-29 18:19:55") & gps_data$datetime <= as.POSIXct("2023-05-29 18:27:35") | 
  gps_data$datetime >= as.POSIXct("2023-05-29 18:28:35") & gps_data$datetime <= as.POSIXct("2023-05-29 18:36:00") | 
  gps_data$datetime >= as.POSIXct("2023-05-29 18:56:35") & gps_data$datetime <= as.POSIXct("2023-05-29 19:09:30") |
  gps_data$datetime >= as.POSIXct("2023-05-30 17:31:10") & gps_data$datetime <= as.POSIXct("2023-05-30 17:42:50") |
  gps_data$datetime >= as.POSIXct("2023-05-30 19:04:55") & gps_data$datetime <= as.POSIXct("2023-05-30 19:48:50") |
  gps_data$datetime >= as.POSIXct("2023-05-31 17:08:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:09:40") |
  gps_data$datetime >= as.POSIXct("2023-05-31 17:34:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:42:25") |
  gps_data$datetime >= as.POSIXct("2023-06-02 13:29:10") & gps_data$datetime <= as.POSIXct("2023-06-02 13:30:45") |
  gps_data$datetime >= as.POSIXct("2023-06-02 13:36:25") & gps_data$datetime <= as.POSIXct("2023-06-02 13:50:55") |
  gps_data$datetime >= as.POSIXct("2023-06-02 14:55:10") & gps_data$datetime <= as.POSIXct("2023-06-02 15:13:10") |
  gps_data$datetime >= as.POSIXct("2023-06-04 16:58:15") & gps_data$datetime <= as.POSIXct("2023-06-04 17:00:00") |
  gps_data$datetime >= as.POSIXct("2023-06-04 17:09:20") & gps_data$datetime <= as.POSIXct("2023-06-04 17:14:10") |
  gps_data$datetime >= as.POSIXct("2023-06-04 18:18:35") & gps_data$datetime <= as.POSIXct("2023-06-04 18:22:25") |
  gps_data$datetime >= as.POSIXct("2023-06-04 18:48:20") & gps_data$datetime <= as.POSIXct("2023-06-04 19:03:35") |
  gps_data$datetime >= as.POSIXct("2023-06-05 15:37:15") & gps_data$datetime <= as.POSIXct("2023-06-05 15:40:15") |
  gps_data$datetime >= as.POSIXct("2023-06-09 13:51:50") & gps_data$datetime <= as.POSIXct("2023-06-09 13:53:45") |
  gps_data$datetime >= as.POSIXct("2023-06-10 18:35:10") & gps_data$datetime <= as.POSIXct("2023-06-10 18:48:30") |
  gps_data$datetime >= as.POSIXct("2023-06-10 20:48:30") & gps_data$datetime <= as.POSIXct("2023-06-10 21:01:00") |
  gps_data$datetime >= as.POSIXct("2023-06-12 19:07:00") & gps_data$datetime <= as.POSIXct("2023-06-12 19:14:45") |
  gps_data$datetime >= as.POSIXct("2023-06-12 19:43:20") & gps_data$datetime <= as.POSIXct("2023-06-12 19:45:40")



condition_walk <- 
  gps_data$datetime >= as.POSIXct("2023-04-21 16:13:40") & gps_data$datetime <= as.POSIXct("2023-04-21 16:16:25") |
  gps_data$datetime >= as.POSIXct("2023-04-25 15:13:05") & gps_data$datetime <= as.POSIXct("2023-04-25 15:18:00") |
  gps_data$datetime >= as.POSIXct("2023-04-25 15:37:00") & gps_data$datetime <= as.POSIXct("2023-04-25 15:40:15") |
  gps_data$datetime >= as.POSIXct("2023-04-25 19:05:05") & gps_data$datetime <= as.POSIXct("2023-04-25 19:08:25") |
  gps_data$datetime >= as.POSIXct("2023-04-25 20:32:45") & gps_data$datetime <= as.POSIXct("2023-04-25 20:35:05") |
  gps_data$datetime >= as.POSIXct("2023-04-29 21:41:45") & gps_data$datetime <= as.POSIXct("2023-04-29 22:19:40") |
  gps_data$datetime >= as.POSIXct("2023-04-30 18:20:25") & gps_data$datetime <= as.POSIXct("2023-04-30 19:39:00") |
  gps_data$datetime >= as.POSIXct("2023-05-01 13:31:50") & gps_data$datetime <= as.POSIXct("2023-05-01 19:22:20") | 
  gps_data$datetime >= as.POSIXct("2023-05-01 19:31:55") & gps_data$datetime <= as.POSIXct("2023-05-01 19:36:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 14:06:10") & gps_data$datetime <= as.POSIXct("2023-05-06 14:09:40") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 16:18:45") & gps_data$datetime <= as.POSIXct("2023-05-06 16:21:30") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 16:23:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:26:25") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 16:33:50") & gps_data$datetime <= as.POSIXct("2023-05-06 16:34:40") | 
  gps_data$datetime >= as.POSIXct("2023-05-06 17:29:25") & gps_data$datetime <= as.POSIXct("2023-05-06 17:32:15") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 17:34:55") & gps_data$datetime <= as.POSIXct("2023-05-07 18:05:05") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 18:30:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:42:30") | 
  gps_data$datetime >= as.POSIXct("2023-05-07 20:13:00") & gps_data$datetime <= as.POSIXct("2023-05-07 20:15:35") | 
  gps_data$datetime >= as.POSIXct("2023-05-08 18:56:05") & gps_data$datetime <= as.POSIXct("2023-05-08 19:21:30") | 
  gps_data$datetime >= as.POSIXct("2023-05-08 19:53:55") & gps_data$datetime <= as.POSIXct("2023-05-08 20:14:55") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 17:10:10") & gps_data$datetime <= as.POSIXct("2023-05-10 17:14:10") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 17:28:50") & gps_data$datetime <= as.POSIXct("2023-05-10 17:52:00") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 18:11:15") & gps_data$datetime <= as.POSIXct("2023-05-10 18:14:00") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 19:36:45") & gps_data$datetime <= as.POSIXct("2023-05-10 19:42:45") | 
  gps_data$datetime >= as.POSIXct("2023-05-10 22:56:30") & gps_data$datetime <= as.POSIXct("2023-05-10 23:04:50") | 
  gps_data$datetime >= as.POSIXct("2023-05-12 07:52:05") & gps_data$datetime <= as.POSIXct("2023-05-12 07:54:35") |
  gps_data$datetime >= as.POSIXct("2023-05-12 17:28:25") & gps_data$datetime <= as.POSIXct("2023-05-12 17:30:45") |
  gps_data$datetime >= as.POSIXct("2023-05-12 18:25:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:28:20") |
  gps_data$datetime >= as.POSIXct("2023-05-13 17:19:15") & gps_data$datetime <= as.POSIXct("2023-05-13 17:39:00") |
  gps_data$datetime >= as.POSIXct("2023-05-13 17:41:40") & gps_data$datetime <= as.POSIXct("2023-05-13 17:45:25") |
  gps_data$datetime >= as.POSIXct("2023-05-16 17:44:00") & gps_data$datetime <= as.POSIXct("2023-05-16 17:47:00") |
  gps_data$datetime >= as.POSIXct("2023-05-16 17:51:20") & gps_data$datetime <= as.POSIXct("2023-05-16 17:54:05") |
  gps_data$datetime >= as.POSIXct("2023-05-17 15:44:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:48:05") |
  gps_data$datetime >= as.POSIXct("2023-05-18 14:21:00") & gps_data$datetime <= as.POSIXct("2023-05-18 14:40:35") |
  gps_data$datetime >= as.POSIXct("2023-05-18 14:57:00") & gps_data$datetime <= as.POSIXct("2023-05-18 15:56:00") |
  gps_data$datetime >= as.POSIXct("2023-05-18 16:11:45") & gps_data$datetime <= as.POSIXct("2023-05-18 16:14:30") |
  gps_data$datetime >= as.POSIXct("2023-05-20 16:39:20") & gps_data$datetime <= as.POSIXct("2023-05-20 16:41:00") |
  gps_data$datetime >= as.POSIXct("2023-05-20 17:09:00") & gps_data$datetime <= as.POSIXct("2023-05-20 20:30:00") |
  gps_data$datetime >= as.POSIXct("2023-05-20 20:49:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:53:35") |
  gps_data$datetime >= as.POSIXct("2023-05-21 19:37:35") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:05") |
  gps_data$datetime >= as.POSIXct("2023-05-21 20:48:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:52:20") |
  gps_data$datetime >= as.POSIXct("2023-05-23 17:34:05") & gps_data$datetime <= as.POSIXct("2023-05-23 17:36:25") |
  gps_data$datetime >= as.POSIXct("2023-05-23 17:58:45") & gps_data$datetime <= as.POSIXct("2023-05-23 19:16:00") |
  gps_data$datetime >= as.POSIXct("2023-05-23 19:39:10") & gps_data$datetime <= as.POSIXct("2023-05-23 19:42:10") |
  gps_data$datetime >= as.POSIXct("2023-05-24 09:12:05") & gps_data$datetime <= as.POSIXct("2023-05-24 09:17:20") |
  gps_data$datetime >= as.POSIXct("2023-05-24 14:50:15") & gps_data$datetime <= as.POSIXct("2023-05-24 14:52:20") |
  gps_data$datetime >= as.POSIXct("2023-05-24 15:42:05") & gps_data$datetime <= as.POSIXct("2023-05-24 16:38:35") |
  gps_data$datetime >= as.POSIXct("2023-05-25 14:35:55") & gps_data$datetime <= as.POSIXct("2023-05-25 14:40:25") |
  gps_data$datetime >= as.POSIXct("2023-05-25 14:55:35") & gps_data$datetime <= as.POSIXct("2023-05-25 14:57:05") |
  gps_data$datetime >= as.POSIXct("2023-05-25 15:14:50") & gps_data$datetime <= as.POSIXct("2023-05-25 15:19:15") |
  gps_data$datetime >= as.POSIXct("2023-05-25 15:29:00") & gps_data$datetime <= as.POSIXct("2023-05-25 15:35:35") |
  gps_data$datetime >= as.POSIXct("2023-05-25 20:06:20") & gps_data$datetime <= as.POSIXct("2023-05-25 20:52:00") |
  gps_data$datetime >= as.POSIXct("2023-05-26 07:51:15") & gps_data$datetime <= as.POSIXct("2023-05-26 07:54:15") |
  gps_data$datetime >= as.POSIXct("2023-05-26 08:47:15") & gps_data$datetime <= as.POSIXct("2023-05-26 12:17:30") |
  gps_data$datetime >= as.POSIXct("2023-05-26 13:31:40") & gps_data$datetime <= as.POSIXct("2023-05-26 13:49:00") |
  gps_data$datetime >= as.POSIXct("2023-05-26 14:27:10") & gps_data$datetime <= as.POSIXct("2023-05-26 17:20:10") |
  gps_data$datetime >= as.POSIXct("2023-05-28 15:39:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:19:00") |
  gps_data$datetime >= as.POSIXct("2023-05-28 20:57:05") & gps_data$datetime <= as.POSIXct("2023-05-28 21:00:00") |
  gps_data$datetime >= as.POSIXct("2023-05-29 17:05:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:40:00") |
  gps_data$datetime >= as.POSIXct("2023-05-29 18:13:30") & gps_data$datetime <= as.POSIXct("2023-05-29 18:18:45") |
  gps_data$datetime >= as.POSIXct("2023-05-30 11:33:25") & gps_data$datetime <= as.POSIXct("2023-05-30 11:37:15") |
  gps_data$datetime >= as.POSIXct("2023-05-31 17:44:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:48:00") |
  gps_data$datetime >= as.POSIXct("2023-06-02 13:23:45") & gps_data$datetime <= as.POSIXct("2023-06-02 13:28:00") |
  gps_data$datetime >= as.POSIXct("2023-06-02 13:55:35") & gps_data$datetime <= as.POSIXct("2023-06-02 14:19:00") |
  gps_data$datetime >= as.POSIXct("2023-06-02 14:39:30") & gps_data$datetime <= as.POSIXct("2023-06-02 14:47:05") |
  gps_data$datetime >= as.POSIXct("2023-06-02 15:16:45") & gps_data$datetime <= as.POSIXct("2023-06-02 15:20:25") |
  gps_data$datetime >= as.POSIXct("2023-06-04 17:00:50") & gps_data$datetime <= as.POSIXct("2023-06-04 17:06:30") |
  gps_data$datetime >= as.POSIXct("2023-06-04 17:17:20") & gps_data$datetime <= as.POSIXct("2023-06-04 18:17:00") |
  gps_data$datetime >= as.POSIXct("2023-06-04 19:04:45") & gps_data$datetime <= as.POSIXct("2023-06-04 19:08:00") |
  gps_data$datetime >= as.POSIXct("2023-06-05 12:51:35") & gps_data$datetime <= as.POSIXct("2023-06-05 12:56:40") |
  gps_data$datetime >= as.POSIXct("2023-06-05 14:49:00") & gps_data$datetime <= as.POSIXct("2023-06-05 14:55:25") |
  gps_data$datetime >= as.POSIXct("2023-06-05 15:28:00") & gps_data$datetime <= as.POSIXct("2023-06-05 15:36:00") |
  gps_data$datetime >= as.POSIXct("2023-06-05 15:42:40") & gps_data$datetime <= as.POSIXct("2023-06-05 15:46:05") |
  gps_data$datetime >= as.POSIXct("2023-06-06 20:00:05") & gps_data$datetime <= as.POSIXct("2023-06-06 20:34:25") |
  gps_data$datetime >= as.POSIXct("2023-06-08 20:55:10") & gps_data$datetime <= as.POSIXct("2023-06-08 21:22:10") |
  gps_data$datetime >= as.POSIXct("2023-06-09 13:54:10") & gps_data$datetime <= as.POSIXct("2023-06-09 17:14:20") |
  gps_data$datetime >= as.POSIXct("2023-06-10 18:50:20") & gps_data$datetime <= as.POSIXct("2023-06-10 20:45:00") |
  gps_data$datetime >= as.POSIXct("2023-06-10 21:03:15") & gps_data$datetime <= as.POSIXct("2023-06-10 21:05:40") |
  gps_data$datetime >= as.POSIXct("2023-06-12 21:29:35") & gps_data$datetime <= as.POSIXct("2023-06-12 22:11:45") |
  gps_data$datetime >= as.POSIXct("2023-06-15 21:43:25") & gps_data$datetime <= as.POSIXct("2023-06-15 21:45:00") |
  gps_data$datetime >= as.POSIXct("2023-06-15 21:52:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:55:05") |
  gps_data$datetime >= as.POSIXct("2023-06-15 21:58:05") & gps_data$datetime <= as.POSIXct("2023-06-15 21:59:55") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:02:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:07:15") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:14:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:19:00") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:25:25") & gps_data$datetime <= as.POSIXct("2023-06-15 22:28:20") |
  gps_data$datetime >= as.POSIXct("2023-06-16 10:17:45") & gps_data$datetime <= as.POSIXct("2023-06-16 10:21:25") 



condition_boat <- 
  gps_data$datetime >= as.POSIXct("2023-06-02 14:20:00") & gps_data$datetime <= as.POSIXct("2023-06-02 14:37:20")



condition_run <- 
  gps_data$datetime >= as.POSIXct("2023-05-21 20:24:10") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:55") |
  gps_data$datetime >= as.POSIXct("2023-06-08 20:59:00") & gps_data$datetime <= as.POSIXct("2023-06-08 21:17:55") |
  gps_data$datetime >= as.POSIXct("2023-06-15 21:45:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:52:00") |
  gps_data$datetime >= as.POSIXct("2023-06-15 21:55:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:57:25") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:00:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:01:55") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:07:40") & gps_data$datetime <= as.POSIXct("2023-06-15 22:14:40") |
  gps_data$datetime >= as.POSIXct("2023-06-15 22:19:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:20:55")



# Labelizing data
gps_data <- gps_data %>%
  mutate(tmode_manual = case_when(condition_walk ~ 'walk',
                                  condition_tram ~ 'tram',
                                  condition_bus ~ 'bus',
                                  condition_boat ~ 'boat',
                                  condition_run ~ 'run',
                                  is.na(gps_data$tmode_manual) == TRUE ~ 'unclassified'
                                 ))
  • overview …

Merge Posmo and GPS tracker

  • Compute new variables: timelag, steplength, speed, acceleration, slope, etc.
  • remove statics (ex.3)
  • Add segment IDs (ex.3 - task 4)
#here merge with gps tracker
mvmt_data <- rbind(posmo, gps_data)
unique(mvmt_data$tmode_manual)
##  [1] "unclassified" "bus"          "train"        "tram"         "car"         
##  [6] "walk"         "cable_car"    "ski_lift"     "ski"          "t_bar"       
## [11] "bike"         "kick-scooter" "boat"         "plane"        "run"
col_order <- c("source", "datetime", "tmode_manual" ,"E", "N", "H", "geometry")
mvmt_data <- mvmt_data[, col_order]
mvmt_data
## Simple feature collection with 74871 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2508048 ymin: 1106915 xmax: 2696117 ymax: 1266310
## Projected CRS: CH1903+ / LV95
## # A tibble: 74,871 × 7
##    source datetime            tmode_manual        E        N     H
##    <chr>  <dttm>              <chr>           <dbl>    <dbl> <dbl>
##  1 posmo  2023-03-30 17:40:56 unclassified 2556120. 1202625.  490.
##  2 posmo  2023-03-30 17:41:05 unclassified 2555910. 1202492.  493.
##  3 posmo  2023-03-30 17:41:15 unclassified 2555629. 1202324.  495.
##  4 posmo  2023-03-30 17:41:32 unclassified 2555166. 1202092.  489.
##  5 posmo  2023-03-30 17:41:41 unclassified 2554897. 1201957.  493.
##  6 posmo  2023-03-30 17:45:33 unclassified 2550265. 1194674.  437.
##  7 posmo  2023-03-30 17:45:42 unclassified 2549951. 1194430.  449.
##  8 posmo  2023-03-30 17:45:42 unclassified 2549891. 1194429.  452.
##  9 posmo  2023-03-30 17:45:42 unclassified 2549951. 1194430.  449.
## 10 posmo  2023-03-30 17:45:51 unclassified 2549592. 1194179.  449.
## # ℹ 74,861 more rows
## # ℹ 1 more variable: geometry <POINT [m]>

Overview of Posmo and GPS tracker datasets:

Map general overview - two users:

#map overview
mvmt_data_wgs <- mvmt_data %>% st_transform(crs = 4326)

m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircleMarkers(data = mvmt_data_wgs,
                   #opacity = 0.3, 
                   radius = 0.2,
                   popup = mvmt_data_wgs$datetime,
                   color = 'blue') %>%
  addLegend(position = 'topright', 
            colors = c('blue'), 
            labels = c('points'),
            title = 'All points from GPS tracker and posmo')

# Plot the leaflet object m
m

Map general overview - labelized segments:

#map overview
mvmt_data_wgs <- mvmt_data %>% st_transform(crs = 4326) %>% filter(tmode_manual != "classified")



# check overlapping days of labelized data
# mvmt_data_wgs %>% 
#   mutate(
#     day = format(as.POSIXct(datetime), format = "%Y-%m-%d")) %>%
#   group_by(day, source) %>% 
#   summarize(n()) %>% View()


#Palette
factpal <- colorFactor(topo.colors(2), domain =mvmt_data_wgs$source)

m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircleMarkers(data = mvmt_data_wgs,
                   #opacity = 0.3, 
                   radius = 0.2,
                   popup = mvmt_data_wgs$datetime,
                   color = ~factpal(source)) %>%
                   #color = 'blue') %>%
  addLegend(position = 'topright', 
            #colors = c('blue'), 
            #labels = c('points'),
            pal = factpal,
            values = mvmt_data_wgs$source,
            title = 'Labelized points from GPS tracker and posmo')

# Plot the leaflet object m
m

Map general overview - posmo user:

Map general overview - GPS tracker user:

Map general overview - per region (?):

Raw trajectory derivatives

Here we will compute all necessary values to determine mean of transport:

#compute trajectory derivatives

mvmt_data <- 
  mvmt_data %>%
  group_by(source) %>%
  mutate(
    timelag = as.numeric(difftime(lead(datetime), datetime)),
    steplength_m = sqrt((E-lead(E))^2 + (N-lead(N))^2), # Horizontal steplength 
    steplength_h = H-lead(H), # Add vertical steplength
    speed_ms = abs(steplength_m)/timelag,
    speed_v = abs(steplength_h)/timelag,
    slope = (lead(H) - H)/steplength_m * 100
    )

#remove timelag 0 -> recording errors
mvmt_data <- mvmt_data %>% filter(timelag != 0)
# add regions
#Zurich
zh_ll <- c(2674900, 1244000)
zh_ur <- c(2692100, 1260300)

#Waadt
wt_ll <- c(2480000, 1127600)
wt_ur <- c(2575200, 1195100)

#wallis
ws_ll <- c(2538000, 1076875)
ws_ur <- c(2661500, 1153125)

mvmt_data$region <- NA
mvmt_data <- mvmt_data |> 
  mutate(
    region = case_when(
      E > zh_ll[1] & E < zh_ur[1] & N > zh_ll[2] & N < zh_ur[2] ~ 'Zurich', 
      E > wt_ll[1] & E < wt_ur[1] & N > wt_ll[2] & N < wt_ur[2] ~ 'Waadt',
      E > ws_ll[1] & E < ws_ur[1] & N > ws_ll[2] & N < ws_ur[2] ~ 'Wallis'
      )
    )

Segmentize

  • remove statics (ex.3)
  • Add segment IDs (ex.3 - task 4) /! we could use the static points for means of transport. …

posmo sampling rate: 10 sec GPS sampling rate: 5 sec

  1. SAMPLE DATA FOR TRYING IT OUT
    Point to drop when we are done with the tests.
#try on a sampled data
mvmt_data2 <- mvmt_data #|> filter(as.Date(datetime) == "2023-04-12")
  #filter(as.Date(datetime) == "2023-05-04")
  #filter(tmode_manual == "train")
mvmt_data2
## Simple feature collection with 73025 features and 13 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2508048 ymin: 1106915 xmax: 2696117 ymax: 1256849
## Projected CRS: CH1903+ / LV95
## # A tibble: 73,025 × 14
## # Groups:   source [2]
##    source datetime            tmode_manual        E        N     H
##  * <chr>  <dttm>              <chr>           <dbl>    <dbl> <dbl>
##  1 posmo  2023-03-30 17:40:56 unclassified 2556120. 1202625.  490.
##  2 posmo  2023-03-30 17:41:05 unclassified 2555910. 1202492.  493.
##  3 posmo  2023-03-30 17:41:15 unclassified 2555629. 1202324.  495.
##  4 posmo  2023-03-30 17:41:32 unclassified 2555166. 1202092.  489.
##  5 posmo  2023-03-30 17:41:41 unclassified 2554897. 1201957.  493.
##  6 posmo  2023-03-30 17:45:33 unclassified 2550265. 1194674.  437.
##  7 posmo  2023-03-30 17:45:42 unclassified 2549951. 1194430.  449.
##  8 posmo  2023-03-30 17:45:51 unclassified 2549592. 1194179.  449.
##  9 posmo  2023-03-30 17:47:15 unclassified 2547601. 1191098.  480.
## 10 posmo  2023-03-30 17:54:38 unclassified 2539084. 1181463.  434.
## # ℹ 73,015 more rows
## # ℹ 8 more variables: geometry <POINT [m]>, timelag <dbl>, steplength_m <dbl>,
## #   steplength_h <dbl>, speed_ms <dbl>, speed_v <dbl>, slope <dbl>,
## #   region <chr>
  1. Detect gaps in recording
#determine static points: choose an average distance to use as the threshold
mvmt_data_gap <- mvmt_data2 |>
  ungroup() |>
  mutate(gap = timelag > 30)


#Visualisation of gap / no gap
visu_gap_wgs <- mvmt_data_gap |> filter(gap) %>% st_transform(crs = 4326)
visu_no_gap_wgs <- mvmt_data_gap |> filter(!gap) %>% st_transform(crs = 4326)

m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircleMarkers(data = visu_no_gap_wgs,
                   radius = 0.2,
                   popup = paste(
                     " steplength: ", round(visu_no_gap_wgs$steplength_m,2),
                     " timelag: ", visu_no_gap_wgs$timelag,
                     " speed_ms: ", round(visu_no_gap_wgs$speed_ms,2),
                     " source: ", visu_no_gap_wgs$source
                     ),
                   color = 'blue') %>%
  addCircleMarkers(data = visu_gap_wgs,
                   radius = 0.2,
                   popup = paste(
                     " steplength: ", round(visu_gap_wgs$steplength_m,2),
                     " timelag: ", visu_gap_wgs$timelag,
                     " speed_ms: ", round(visu_gap_wgs$speed_ms,2),
                     " source: ", visu_gap_wgs$source
                     ),
                   color = 'red') %>%
  addLegend(position = 'topright', 
            colors = c('blue', 'red'), 
            labels = c('no gap', 'gap'),
            title = 'Gap in the data')

# Plot the leaflet object m
m
  1. Segmentation per gap, user and transport mode
#function source: Exercice3
rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

mvmt_data_segmentized <- mvmt_data_gap %>%
  mutate(
    segment_ID_user = rle_id(source),
    segment_ID_gap = 1, #to split segments when recording gap in it
    segment_ID_tmode = rle_id(tmode_manual)
  )

#increment segment_ID_gap for each TRUE
for (i in 2:nrow(mvmt_data_segmentized)) {
  if (mvmt_data_segmentized$gap[i-1] == TRUE) {
    mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1] + 1
  } else {
    mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1]
  }
}

# ADD PRE-FINAL ID for segmentation
mvmt_data_segmentized <- mvmt_data_segmentized %>%
  mutate(
    segment_ID_both = paste(segment_ID_user, segment_ID_gap, segment_ID_tmode, sep = "-")
  )


# mvmt_data_segmentized <- mvmt_data_gap %>%
#   mutate(
#     segment_ID_user = rle_id(source),
#     segment_ID_gap = rle_id(gap), #to split segments when recording gap in it
#     segment_ID_tmode = rle_id(tmode_manual),
#     segment_ID_both = paste(segment_ID_user, segment_ID_gap, segment_ID_tmode, sep = "-")
#     #segment_ID_both = as.numeric(segment_ID_user) * as.numeric(segment_ID_gap) * as.numeric(segment_ID_tmode)
#   )

# Add duration per segment 
# change the unit from mins to secs
mvmt_data_segmentized <- mvmt_data_segmentized %>%
  group_by(segment_ID_both) %>%
  mutate(duration_secs = as.integer(difftime(max(datetime), min(datetime), units = "secs")))

#specify temporal window
#60 seconds ?
#/!\ ordered and by user
#mvmt_data <- mvmt_data %>% arrange(desc(datetime))


#meanstep
mvmt_data_segmentized <- mvmt_data_segmentized |>
  group_by(segment_ID_both) %>%
    mutate(
      nMinus5 = sqrt((lag(E, 5) - E)^2 + (lag(N, 5) - N)^2),
      nMinus4 = sqrt((lag(E, 4) - E)^2 + (lag(N, 4) - N)^2), 
      nMinus3 = sqrt((lag(E, 3) - E)^2 + (lag(N, 3) - N)^2), #dist to pos -30s
      nMinus2 = sqrt((lag(E, 2) - E)^2 + (lag(N, 2) - N)^2), #dist to pos -20s
      nMinus1 = sqrt((lag(E, 1) - E)^2 + (lag(N, 1) - N)^2), #dist to pos -10 sec
      nPlus1  = sqrt((E - lead(E, 1))^2 + (N - lead(N, 1))^2), #dist to pos +10 sec
      nPlus2  = sqrt((E - lead(E, 2))^2 + (N - lead(N, 2))^2), #dist to pos +20 sec
      nPlus3  = sqrt((E - lead(E, 3))^2 + (N - lead(N, 3))^2), #dist to pos +30 sec
      nPlus4  = sqrt((E - lead(E, 4))^2 + (N - lead(N, 4))^2), #dist to pos +30 sec
      nPlus5  = sqrt((E - lead(E, 5))^2 + (N - lead(N, 5))^2), #dist to pos +30 sec
    )
mvmt_data_segmentized <- mvmt_data_segmentized |>
    rowwise() |>
    mutate(
        stepMean_smallw = round(mean(c(nMinus1, nPlus1)),2),
        stepMean_midw = round(mean(c(nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3)),2),
        stepMean_bigw = round(mean(c(nMinus5, nMinus4, nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3, nPlus4, nPlus5)),2)
    ) |>
    ungroup()


# # MAP VISUALISATION
# n_segs <- mvmt_data_segmentized$segment_ID_both %>% unique() %>% length()
# mvmt_data_wgs <- mvmt_data_segmentized %>% st_transform(crs = 4326)
# 
# set.seed(2)
# factpal <- colorFactor(sample(rainbow(n_segs)), domain =mvmt_data_wgs$segment_ID_both)
# 
# m <- leaflet() %>%
#   #addTiles() %>%  # Add default OpenStreetMap map tiles
#   addProviderTiles("Esri.WorldGrayCanvas") %>%
#   addCircleMarkers(data = mvmt_data_wgs,
#                    #opacity = 0.3,
#                    radius = 0.2,
#                    popup = paste("seg_ID: ", mvmt_data_wgs$segment_ID_both),
#                    color = ~factpal(segment_ID_both)) %>%
#   addLegend(position = 'topright',
#             #colors = c('red'),
#             #labels = c('static'),
#             pal = factpal,
#             values = mvmt_data_wgs$segment_ID_both,
#             title = 'Segments')
# 
# # Plot the leaflet object m
# m
  1. Add statics info
#determine static points: choose an average distance to use as the threshold

mvmt_data_add_static <- mvmt_data_segmentized |>
  group_by(segment_ID_both) %>%
  #ungroup() |>
  mutate(
    #stepMean = ifelse(is.na(stepMean), 500, stepMean), # first and last points of window
    static = 
           #timelag < 5.0
           #(timelag_mean > 1.0 | timelag_mean < -1.0) 
           #timelag_mean != 0.0 | 
           #is.na(timelag_mean)
           #speed_ms_mean < 0.02 & speed_ms_mean > -0.02
           #stepMean_bigw < mean(stepMean_smallw, na.rm=TRUE),
           # Optimize by adding 'or' conditions
           stepMean_bigw < mean(stepMean_smallw, na.rm=TRUE) | 
           steplength_m < 0.5,
           #stepMean < mean(steplength_m, na.rm=TRUE),
           #mean(steplength_m, na.rm = TRUE)
    static =  ifelse(is.na(static), FALSE, static)
           )

# Add percentage of static in each segment
mvmt_data_add_static <- mvmt_data_add_static |>
  group_by(segment_ID_both) %>%
  mutate(
    static_percent = (sum(static))/(length(static))
    # Optimize by only considering continuous static points
    )


#mvmt_data %>% group_by(source) %>% summarize(mean = mean(stepMean)) %>% View()


# #Visualisation of static / non static:
# visu_static_wgs <- mvmt_data_add_static |> filter(static) %>% st_transform(crs = 4326)
# 
# visu_no_static_wgs <- mvmt_data_add_static |> filter(!static) %>% st_transform(crs = 4326)
# 
# m <- leaflet() %>%
#   addTiles() %>%  # Add default OpenStreetMap map tiles
#   addCircleMarkers(data = visu_no_static_wgs,
#                    #opacity = 0.3,
#                    radius = 0.2,
#                    popup = paste(
#                      "step_mean: ", visu_no_static_wgs$stepMean_midw,
#                      " steplength: ", round(visu_no_static_wgs$steplength_m,2),
#                      " timelag: ", visu_no_static_wgs$timelag,
#                      " speed_ms: ", round(visu_no_static_wgs$speed_ms,2),
#                      " source: ", visu_no_static_wgs$source
#                      ),
#                    color = 'blue') %>%
#   addCircleMarkers(data = visu_static_wgs,
#                    #opacity = 0.3,
#                    radius = 0.2,
#                    popup = paste(
#                      "step_mean: ", visu_static_wgs$stepMean_midw,
#                      " steplength: ", round(visu_static_wgs$steplength_m,2),
#                      " timelag: ", visu_static_wgs$timelag,
#                      " speed_ms: ", round(visu_static_wgs$speed_ms,2),
#                      " source: ", visu_static_wgs$source
#                      ),
#                    color = 'red') %>%
#   addLegend(position = 'topright', 
#             colors = c('blue', 'red'), 
#             labels = c('non static', 'static'),
#             title = 'static vs non static')
# 
# # Plot the leaflet object m
# m

# visu_no_static |>
#     ggplot(aes(E, N)) +
#     geom_path() +
#     geom_point() +
#     coord_fixed() +
#     theme(legend.position = "bottom")
  1. Remove statics and too small segments
#mvmt_data_add_static %>% dplyr::select(c("segment_ID_static", "segment_ID_tmode", "segment_ID_user", "source", "segment_ID_gap", "segment_ID_both")) %>% st_drop_geometry() %>% unique() %>% View()

#mvmt_data %>% dplyr::select(c("segment_ID_static", "segment_ID_tmode")) %>% st_drop_geometry() %>% unique() %>% View()


# filter static and small segments
mvmt_data_filtered <- mvmt_data_add_static %>%
  group_by(segment_ID_both, source, tmode_manual, duration_secs, static, static_percent) %>%
  # remove static segments
  filter(static == FALSE) %>%
  #remove the too small segments (duration < 30 s)
  filter(duration_secs > 30)
# we should also filter segments with less than ~5 points (?)


# MAP VISUALISATION
# n_segs <- mvmt_data_filtered$segment_ID_both %>% unique() %>% length()
# mvmt_data_wgs <- mvmt_data_filtered %>% st_transform(crs = 4326)
# 
# set.seed(2)
# factpal <- colorFactor(sample(rainbow(n_segs)), domain =mvmt_data_wgs$segment_ID_both)
# 
# m <- leaflet() %>%
#   #addTiles() %>%  # Add default OpenStreetMap map tiles
#   addProviderTiles("Esri.WorldGrayCanvas") %>%
#   addCircleMarkers(data = mvmt_data_wgs,
#                    #opacity = 0.3,
#                    radius = 0.2,
#                    popup = paste("seg_ID: ", mvmt_data_wgs$segment_ID_both),
#                    color = ~factpal(segment_ID_both)) %>%
#   addLegend(position = 'topright', 
#             #colors = c('red'), 
#             #labels = c('static'),
#             pal = factpal,
#             values = mvmt_data_wgs$segment_ID_both,
#             title = 'Segments - filtered statics and small')
# 
# # Plot the leaflet object m
# m
  1. Resegmentize per duration
    (For Machine Learning)
mvmt_data_split <- mvmt_data_filtered %>% 
  group_by(segment_ID_both) %>%
  mutate(
    #remove timelag to next segment
    timelag = case_when(timelag > 30 ~ 0,
                        timelag <= -20 ~ 0,
                        .default = timelag),
    duration_index = case_when(
      (duration_secs > 100) & (duration_secs %% 100) >= 50 ~ ceiling(cumsum(timelag)/100),
      (duration_secs > 100) & (duration_secs %% 100) < 50 ~ ceiling((cumsum(timelag)/100)-1),
    .default = 0
    ),
    segment_ID_final = paste(segment_ID_both, duration_index, sep = "-")
  )

mvmt_data_split %>% View()
  1. final segmentization Note: static to improve.
mvmt_seg_final <- mvmt_data_split %>% 
  dplyr::select(-c("segment_ID_user", "segment_ID_both", "segment_ID_gap", "duration_index", "segment_ID_tmode", "nMinus5", "nMinus4", "nMinus3", "nMinus2", "nMinus1", "nPlus1", "nPlus2", "nPlus3", "nPlus4", "nPlus5"
))

Computing trajectory derivatives per segments

Compute trajectories derivatives for each segment –> to do after annotation.

# SUMMARIZE
mvmt_seg_summary <- mvmt_seg_final %>%
  group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
  summarize(
    # Total segments
    #time_duration = duration_secs,
    space_duration = sqrt((sum(abs(steplength_m))^2 + (sum(abs(steplength_h))^2))),
    stepMean_midw_mean = mean(stepMean_midw, na.rm=TRUE), 
    # Step length
    steplength_mean = mean(steplength_m),
    steplength_sd = sd(steplength_m),
    # Horizontal speed
    speed_h_mean = mean(speed_ms), 
    speed_h_sd = sd(speed_ms),
    speed_h_range = max(speed_ms) - min(speed_ms),
    # Vertical speed
    speed_v_mean = mean(speed_v),
    speed_v_sd = sd(speed_v),
    speed_v_range = max(speed_v) - min(speed_v),
    # Sinuosity
    # Acceleration
    # Geometry
    lst_geometry = list(geometry))
## `summarise()` has grouped output by 'segment_ID_final', 'static_percent',
## 'duration_secs'. You can override using the `.groups` argument.
mvmt_seg_summary %>% View()

Annotate trajectories with environment datasets

  • datasets descriptions
  • annotate trajectories

/! dissolve the shapefiles first makes the calculation quickest.

done: - check buffer distance with other examples - calculate % per segment to attributes FALSE/TRUE to the segment -> add it in mvmt_data_summarized

Q: -> faire convex-hull autour des points pour couper les datasets et limiter le tps de calcul? (sur R)

#ALL

# 1) RAILS
rails_shp <- read_sf(here(data_folder, "railways", "rails_train_dissolved.shp")) %>% st_zm() 
joined_data <- mvmt_seg_final %>% 
  st_join(rails_shp, join = st_is_within_distance, 30)

mvmt_seg_final$isClosetoRails <- !is.na(joined_data$xtf_id)

# 2) TRAMS
trams_shp <- read_sf(here(data_folder, "railways", "trams_dissolved.shp")) %>% st_zm() 
joined_data <- mvmt_seg_final %>% 
  st_join(trams_shp, join = st_is_within_distance, 20)

mvmt_seg_final$isClosetoTrams <- !is.na(joined_data$xtf_id)

# 3) BUS STOPS
bus_shp <- read_sf(here(data_folder, "stops", "bus_stops_dissolved.shp")) %>% st_zm() 
joined_data <- mvmt_seg_final %>%
  st_join(bus_shp, join = st_is_within_distance, 50)

mvmt_seg_final$isClosetoBus <- !is.na(joined_data$fid)

# 4) HIGHWAYS
roads_shp <- read_sf(here(data_folder, "roads", "roads_dissolved.shp")) %>% st_zm()
joined_data <- mvmt_seg_final %>%
  st_join(roads_shp, join = st_is_within_distance, 20)

mvmt_seg_final$isClosetoHighways <- !is.na(joined_data$UUID)

# 5) LAKES
lakes_shp <- read_sf(here(data_folder, "lakes", "large_lakes_dissolved.shp")) %>% st_zm() 
joined_data <- mvmt_seg_final %>%
  st_join(lakes_shp, join = st_is_within_distance, 1)

mvmt_seg_final$isClosetoLakes <- !is.na(joined_data$UUID)

# 6) SKI LIFTS AND CABLE CARS
cables_shp <- read_sf(here(data_folder, "ski_lift_cable_car", "skilift_cablecar.shp")) %>% st_zm()
joined_data <- mvmt_seg_final %>%
  st_join(cables_shp, join = st_is_within_distance, 10)

mvmt_seg_final$isClosetoCables <- !is.na(joined_data$UUID)

# 7) HIGH ALTITUDES
mvmt_seg_final <- mvmt_seg_final %>% mutate(
  isHighAltitude = case_when(H >= 1200.0 ~ TRUE,
                             .default = FALSE)
)
# SUMMARIZE ANNOTATIONS
mvmt_seg_annotations_summarized <- mvmt_seg_final %>%
  group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
  summarize(
    isClosetoLakes =  (sum(isClosetoLakes) / n() * 100 ) > 0,
    isClosetoRails =  (sum(isClosetoRails) / n() * 100 ) > 70.0,
    isClosetoBus =  (sum(isClosetoBus) / n() * 100 ) > 30.0,
    isClosetoTrams =  (sum(isClosetoTrams) / n() * 100 ) > 70.0,
    isClosetoHighways =  (sum(isClosetoHighways) / n() * 100 ) > 80.0,
    isClosetoCables =  (sum(isClosetoCables) / n() * 100 ) > 80.0,
    isHighAltitude = (sum(isHighAltitude) / n() * 100 ) > 80.0
    )
## `summarise()` has grouped output by 'segment_ID_final', 'static_percent',
## 'duration_secs'. You can override using the `.groups` argument.
mvmt_seg_annotations_summarized %>% View()
#VISUALISATION

#Visualisation of proximity for rails:

# isClosetoRails
# isClosetoTrams
# isClosetoLakes
# isClosetoHighways
# isClosetoCables
# isHighAltitude

visu_true <- mvmt_seg_final |> filter(isClosetoRails) %>% st_transform(crs = 4326)
visu_false <- mvmt_seg_final |> filter(!isClosetoRails) %>% st_transform(crs = 4326)

wgs <- rails_shp %>% st_transform(crs = 4326)
#wgs <- trams_shp %>% st_transform(crs = 4326)
#wgs <- lakes_shp %>% st_transform(crs = 4326)
#wgs <- roads_shp %>% st_transform(crs = 4326)
#wgs <- cables_shp %>% st_transform(crs = 4326)
# ALTITUDE = NO WGS

m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addPolylines(
    data = wgs,
    color = 'green'
  ) %>%
  addCircleMarkers(data = visu_false,
                   radius = 0.2,
                   popup = paste(
                     "tmode: ", visu_false$tmode_manual, ". ID: ", visu_false$segment_ID_final
                     ),
                   color = 'red') %>%
  addCircleMarkers(data = visu_true,
                   radius = 0.2,
                   popup = paste(
                     "tmode: ",  visu_true$tmode_manual, ". ID: ", visu_false$segment_ID_final
                     ),
                   color = 'blue') %>%
  addLegend(position = 'topright', 
            colors = c('blue', 'red'), 
            labels = c('close', 'not close'),
            title = 'Proximity to environment')

# Plot the leaflet object m
m
#VISUALISATION FOR BUS

#VISU
bus_wgs <- read_sf(here(data_folder, "stops", "bus_stops.shp")) %>% st_zm() %>% st_transform(crs = 4326)

#Visualisation of proximity:
visu_true <- mvmt_seg_final |> filter(isClosetoBus) %>% st_transform(crs = 4326)
visu_false <- mvmt_seg_final |> filter(!isClosetoBus) %>% st_transform(crs = 4326)


m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircleMarkers(
    data = bus_wgs,
    color = 'green'
  ) %>%
  addCircleMarkers(data = visu_false,
                   radius = 0.2,
                   popup = paste(
                     "tmode: ", visu_false$tmode_manual, ". ID: ", visu_false$segment_ID_final
                     ),
                   color = 'red') %>%
  addCircleMarkers(data = visu_true,
                   radius = 0.2,
                   popup = paste(
                     "tmode: ",  visu_true$tmode_manual, ". ID: ", visu_false$segment_ID_final
                     ),
                   color = 'blue') %>%
  addLegend(position = 'topright', 
            colors = c('blue', 'red'), 
            labels = c('close', 'not close'),
            title = 'Proximity to environment')

# Plot the leaflet object m
m

Blabla to sort:

IMPORT highways: From swissTLM3D. We selected only highways and autostrasse, because on this road, only cars are allowed, which would be helpful to determine cars segments.

IMPORT LAKES:

Lakes have first been selected (“surface”>=10000000) for the ones that could have boats.Smaller likes could also have boats, but for calcualtion performances we only took the biggest lakes in our calculations. Then dissolved.

Ski lifts and cable cars:

BUS STOPS source: map.geoadmin.ch : Arrêts des transports publics (Office fédéral des transports OFT) sort all the stops which contains “bus” as Type of transport.

386 TRUE / 1374 total

alternative: get lines from OSM.

MOUNTAIN AREAS:
stations de basses altitude (Jura, …) Abbaye: 1000m Les Rasses: 1100m Tuffes: 1200m

Les pléaides: 1200m Villars-sur-Ollon: 1200m (?)

-> lets take 1200m

Summarize characteristics of the means of transport

  • Compute mean variables per segment (ex.2 task 5 (rolling windows)) -> summarize the variables by mean of transports

  • general overview for each mean of transports

Summary all segments from the mean of transport:

#count of segments:
tmode_count <- mvmt_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))

knitr::kable(tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport")
Number of points labelized per mode of transport
tmode_manual n
unclassified 33614
walk 13365
tram 8335
car 6167
train 3787
bus 2229
ski 1775
ski_lift 1148
bike 1096
plane 465
boat 283
run 245
cable_car 221
t_bar 182
kick-scooter 113
#maps per mean of transport?:

mvmt_data_filter <- mvmt_data %>% filter(tmode_manual == "plane") #%>% filter(!is.na(tmode_manual)) #%>% filter(region == "Zurich") 

mvmt_data_filter_wgs <- mvmt_data_filter %>% st_transform(crs = 4326)

m <- leaflet() %>%
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addCircleMarkers(data = mvmt_data_filter_wgs,
                   #opacity = 0.3, 
                   radius = 0.2,
                   popup = mvmt_data_filter_wgs$datetime,
                   color = 'blue') %>%
  addLegend(position = 'topright', 
            colors = c('blue'), 
            labels = c('points'),
            title = 'All segments labelized as (selected)')

# Plot the leaflet object m
m

Machine Learning

… ##### PREPARATION:

# Drop geometry

# # Merge all variables
mvmt_all <- merge(st_drop_geometry(mvmt_seg_summary),
                  st_drop_geometry(mvmt_seg_annotations_summarized),
                  by = c('segment_ID_final', 'tmode_manual',
                         'duration_secs', 'static_percent'))
# ## 1459 RECORDS
# 
# 
# # Save the file from steps above
# write.csv(dplyr::select(mvmt_all, -c("lst_geometry")), 
#            here::here(data_folder,"mvmt_all.csv"), row.names=FALSE)

# read csv
# mvmt_all <- read_csv(here(data_folder, "intermediate_results/mvmt_all.csv"))%>% as.data.frame()

# Filter the 'unclassified' records
mvmt_all <- mvmt_all %>% filter(tmode_manual != "unclassified")

## 583 RECORDS
# mvmt_all <- mvmt_all %>% filter(tmode_manual != "plane")

# Remove
mvmt_all <- na.omit(mvmt_all)
# Label travel mode
mvmt_all$label <- as.factor(mvmt_all$tmode_manual)
## 561 RECORDS

# Convert boolean to 0/1 value
mvmt_all$isClosetoLakes <- as.integer(as.logical(mvmt_all$isClosetoLakes))
mvmt_all$isClosetoRails <- as.integer(as.logical(mvmt_all$isClosetoRails))
mvmt_all$isClosetoBus <- as.integer(as.logical(mvmt_all$isClosetoBus))
mvmt_all$isClosetoTrams <- as.integer(as.logical(mvmt_all$isClosetoTrams))
mvmt_all$isClosetoHighways <- as.integer(as.logical(mvmt_all$isClosetoHighways))
mvmt_all$isClosetoCables <- as.integer(as.logical(mvmt_all$isClosetoCables))
mvmt_all$isHighAltitude <- as.integer(as.logical(mvmt_all$isHighAltitude))

# For machine learning, select factor columns only
mvmt_all <- mvmt_all %>% dplyr::select(c("duration_secs", "static_percent", "space_duration",
                                 "stepMean_midw_mean", "steplength_mean","steplength_sd", 
                                 "speed_h_mean", "speed_h_sd", "speed_h_range", "speed_v_mean",
                                 "speed_v_sd", "speed_v_range", "isClosetoLakes",
                                 "isClosetoRails", "isClosetoBus", "isClosetoTrams",     
                                 "isClosetoHighways", "isClosetoCables", "isHighAltitude","label"))
# unique(mvmt_all$label)

# Split into training and testing dataset
split <- sample.split(mvmt_all, SplitRatio = 0.3)
mvmt_all_test <- subset(mvmt_all, split==TRUE)
mvmt_all_train <- subset(mvmt_all, split==FALSE)

# For validation
# for 10-fold cross validation 
k_fold <- 10
n3 <- nrow(mvmt_all_train)
n_tail <- n3%/%k_fold
rnd_n <- runif(n3)
rank_n <- rank(rnd_n)
chunk <- (rank_n - 1)%/%n_tail + 1
chunk <- as.factor(chunk)
mvmt_all_train_chunk <- mvmt_all_train
mvmt_all_train_chunk$chunk <- chunk

KNN algorithms

# Build models with 3 parameter sets
for(m in 1:3){
  knn_train_cva <- numeric(0)
  knn_test_cva <- numeric(0)
  
  if (m == 1){
      kv= 5
    }
    if (m == 2){
      kv= 11
    }
    if (m == 3){
      kv= 50
    }
  
  for(i in 1:k_fold){
    set.seed(21)
    knn_train_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
                          test = dplyr::select(mvmt_all_train, -label),
                       cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
    knn_test_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
                       test = dplyr::select(mvmt_all_test, -label),
                       cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
    
    knn_train_cva <- rbind(knn_train_cva, Accuracy(knn_train_pred, mvmt_all_train$label))
    knn_test_cva <- rbind(knn_test_cva, Accuracy(knn_test_pred, mvmt_all_test$label))
  }
  print(knn_train_cva)
  print(knn_test_cva)

  if (m == 1){
    knn_train_pred1 <- knn_train_pred
    knn_test_pred1 <-  knn_test_pred
    knn_train_cva1 <- knn_train_cva
    knn_test_cva1 <- knn_test_cva 
    knn_train_cva_mean1<-mean(knn_train_cva)
    knn_test_cva_mean1<-mean(knn_test_cva)
    }
  if (m == 2){
    knn_train_pred2 <- knn_train_pred
    knn_test_pred2 <-  knn_test_pred
    knn_train_cva2 <- knn_train_cva
    knn_test_cva2 <- knn_test_cva 
    knn_train_cva_mean2<-mean(knn_train_cva)
    knn_test_cva_mean2<-mean(knn_test_cva)
    }
  if (m == 3){
    knn_train_pred3 <- knn_train_pred
    knn_test_pred3 <-  knn_test_pred
    knn_train_cva3 <- knn_train_cva
    knn_test_cva3 <- knn_test_cva 
    knn_train_cva_mean3<-mean(knn_train_cva)
    knn_test_cva_mean3<-mean(knn_test_cva)
    }
}
##            [,1]
##  [1,] 0.8464460
##  [2,] 0.8464460
##  [3,] 0.8426479
##  [4,] 0.8442756
##  [5,] 0.8464460
##  [6,] 0.8431905
##  [7,] 0.8507868
##  [8,] 0.8442756
##  [9,] 0.8453608
## [10,] 0.8404775
##            [,1]
##  [1,] 0.7949367
##  [2,] 0.7886076
##  [3,] 0.7860759
##  [4,] 0.7949367
##  [5,] 0.7860759
##  [6,] 0.7911392
##  [7,] 0.7974684
##  [8,] 0.7924051
##  [9,] 0.7924051
## [10,] 0.7936709
##            [,1]
##  [1,] 0.8068367
##  [2,] 0.8003256
##  [3,] 0.8003256
##  [4,] 0.7986978
##  [5,] 0.8057515
##  [6,] 0.8008681
##  [7,] 0.8030385
##  [8,] 0.8052089
##  [9,] 0.7932718
## [10,] 0.7997830
##            [,1]
##  [1,] 0.7683544
##  [2,] 0.7632911
##  [3,] 0.7556962
##  [4,] 0.7645570
##  [5,] 0.7632911
##  [6,] 0.7569620
##  [7,] 0.7607595
##  [8,] 0.7531646
##  [9,] 0.7582278
## [10,] 0.7670886
##            [,1]
##  [1,] 0.7189365
##  [2,] 0.7238199
##  [3,] 0.7254476
##  [4,] 0.7183939
##  [5,] 0.7227347
##  [6,] 0.7265328
##  [7,] 0.7205643
##  [8,] 0.7211069
##  [9,] 0.7232773
## [10,] 0.7254476
##            [,1]
##  [1,] 0.7126582
##  [2,] 0.7113924
##  [3,] 0.7151899
##  [4,] 0.7075949
##  [5,] 0.7177215
##  [6,] 0.7126582
##  [7,] 0.7139241
##  [8,] 0.7126582
##  [9,] 0.7151899
## [10,] 0.7088608
plot(mvmt_all$label)

ConfusionMatrix(knn_test_pred, mvmt_all_test$label)
##               y_pred
## y_true         bike boat bus cable_car car kick-scooter run ski ski_lift t_bar
##   bike            7    0   1         0   0            0   0   0        0     0
##   boat            2    0   0         0   0            0   0   0        0     0
##   bus             2    0   6         0   0            0   0   0        1     0
##   cable_car       0    0   0         0   0            0   0   0        2     0
##   car             0    0   1         0 127            0   0   0        0     0
##   kick-scooter    0    0   0         0   0            0   0   0        0     0
##   run             0    0   0         0   0            0   0   0        0     0
##   ski             2    0   4         0   1            0   0   0        2     0
##   ski_lift        0    0   0         0   0            0   0   0       23     0
##   t_bar           0    0   0         0   0            0   0   0        4     0
##   train           0    0   2         0  34            0   0   0        0     0
##   tram            4    0   4         0   2            0   0   0       12     0
##   walk            0    0   0         0   0            0   0   0        0     0
##               y_pred
## y_true         train tram walk
##   bike             0    7   10
##   boat             0    2    0
##   bus              1   35    0
##   cable_car        0    2    1
##   car             14    8    6
##   kick-scooter     0    1    3
##   run              0    0    2
##   ski              0   21    8
##   ski_lift         0    5    3
##   t_bar            0    1    4
##   train           60    2    1
##   tram             1   89   12
##   walk             0    0  248

Random Forest algorithm

# Build 3 models with different parameter sets
for(m in 1:3){ # Loop through the different models
  if (m == 1){
      ntree_value= 20}
  if (m == 2){
      ntree_value= 40}
  if (m == 3){
      ntree_value= 80}
  rf_train_cva <- numeric(0)
  rf_test_cva <- numeric(0)
  for(i in 1:k_fold){
    set.seed(21)
    rf <- randomForest(x = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk), 
                     y = mvmt_all_train_chunk[chunk != i, ]$label, 
                     importance=TRUE, proximity=TRUE, ntree = ntree_value) 
    rf_train_pred <- predict(rf, dplyr::select(mvmt_all_train, -label), type="class")
    rf_test_pred <- predict(rf, dplyr::select(mvmt_all_test, -label), type="class")
    rf_train_cva <- rbind(rf_train_cva, Accuracy(rf_train_pred, mvmt_all_train$label))
    rf_test_cva <- rbind(rf_test_cva, Accuracy(rf_test_pred, mvmt_all_test$label))
  }
  print(rf_train_cva)
  print(rf_test_cva)
  
  if (m == 1){
    rf_train_pred1 <- rf_train_pred
    rf_test_pred1 <-  rf_test_pred
    rf_train_cva1 <- rf_train_cva
    rf_test_cva1 <- rf_test_cva 
    rf_train_cva_mean1<-mean(rf_train_cva)
    rf_test_cva_mean1<-mean(rf_test_cva)
    }
  if (m == 2){
    rf_train_pred2 <- rf_train_pred
    rf_test_pred2 <-  rf_test_pred
    rf_train_cva2 <- rf_train_cva
    rf_test_cva2 <- rf_test_cva 
    rf_train_cva_mean2<-mean(rf_train_cva)
    rf_test_cva_mean2<-mean(rf_test_cva)
    }
  if (m == 3){
    rf_train_pred3 <- rf_train_pred
    rf_test_pred3 <-  rf_test_pred
    rf_train_cva3 <- rf_train_cva
    rf_test_cva3 <- rf_test_cva 
    rf_train_cva_mean3<-mean(rf_train_cva)
    rf_test_cva_mean3<-mean(rf_test_cva)
    }}
##            [,1]
##  [1,] 0.9940315
##  [2,] 0.9891481
##  [3,] 0.9929463
##  [4,] 0.9907759
##  [5,] 0.9951167
##  [6,] 0.9929463
##  [7,] 0.9924037
##  [8,] 0.9896907
##  [9,] 0.9907759
## [10,] 0.9956593
##            [,1]
##  [1,] 0.9379747
##  [2,] 0.9354430
##  [3,] 0.9303797
##  [4,] 0.9341772
##  [5,] 0.9303797
##  [6,] 0.9341772
##  [7,] 0.9405063
##  [8,] 0.9506329
##  [9,] 0.9291139
## [10,] 0.9367089
##            [,1]
##  [1,] 0.9945741
##  [2,] 0.9886055
##  [3,] 0.9929463
##  [4,] 0.9907759
##  [5,] 0.9951167
##  [6,] 0.9934889
##  [7,] 0.9940315
##  [8,] 0.9929463
##  [9,] 0.9924037
## [10,] 0.9956593
##            [,1]
##  [1,] 0.9443038
##  [2,] 0.9367089
##  [3,] 0.9341772
##  [4,] 0.9379747
##  [5,] 0.9341772
##  [6,] 0.9354430
##  [7,] 0.9379747
##  [8,] 0.9405063
##  [9,] 0.9392405
## [10,] 0.9354430
##            [,1]
##  [1,] 0.9940315
##  [2,] 0.9886055
##  [3,] 0.9945741
##  [4,] 0.9918611
##  [5,] 0.9956593
##  [6,] 0.9940315
##  [7,] 0.9929463
##  [8,] 0.9934889
##  [9,] 0.9929463
## [10,] 0.9956593
##            [,1]
##  [1,] 0.9443038
##  [2,] 0.9367089
##  [3,] 0.9379747
##  [4,] 0.9443038
##  [5,] 0.9417722
##  [6,] 0.9392405
##  [7,] 0.9417722
##  [8,] 0.9430380
##  [9,] 0.9367089
## [10,] 0.9354430
ConfusionMatrix(rf_test_pred, mvmt_all_test$label)
##               y_pred
## y_true         bike boat bus cable_car car kick-scooter run ski ski_lift t_bar
##   bike           22    0   2         0   0            0   0   0        0     0
##   boat            0    4   0         0   0            0   0   0        0     0
##   bus             5    0  33         0   1            0   0   0        0     0
##   cable_car       0    0   0         2   0            0   0   0        3     0
##   car             0    0   4         0 146            0   0   3        0     0
##   kick-scooter    2    0   0         0   0            2   0   0        0     0
##   run             0    0   0         0   0            0   1   0        0     0
##   ski             0    0   0         0   0            0   0  37        1     0
##   ski_lift        0    0   0         1   0            0   0   2       28     0
##   t_bar           0    0   0         0   0            0   0   0        6     3
##   train           0    0   0         0   3            0   0   0        0     0
##   tram            1    0   1         0   1            0   0   0        0     0
##   walk            0    0   0         0   0            0   1   0        0     0
##               y_pred
## y_true         train tram walk
##   bike             0    1    0
##   boat             0    0    0
##   bus              1    5    0
##   cable_car        0    0    0
##   car              0    0    3
##   kick-scooter     0    0    0
##   run              0    0    1
##   ski              0    0    0
##   ski_lift         0    0    0
##   t_bar            0    0    0
##   train           95    0    1
##   tram             0  120    1
##   walk             0    1  246

Results

Discussion

  • Answer the 4 RQ

Problems encoutered / challenges

  • posmo map matching

Limitations

Conclusion